This markdown contains data from the Cech Annotation project for Ashwagandha. Samples were preprocessed in Progenesis by Luke Marney and de novo annotation was conducted using the SIRIUS suite of software by Yanni Bouranis. All data presented is DIA.
All Ashwagandha samples were run through Progenesis first. No filter was used to narrow down features:
library(tidyverse)
library(DT)
library(ggfortify)
ProgFile <- read_csv('~/Desktop/CechData/220413_QTOF_POS_DIA_RAW.csv', skip = 2) %>%
dplyr::select(Compound, 11:16) %>%
column_to_rownames('Compound')
ProgT <- as.data.frame(t(ProgFile))
PCA <- prcomp(ProgT, scale. = T, center = T)
filenames <- c('Qtof_P_E3_WS03A', 'Qtof_P_E3_WS03B', 'Qtof_P_E3_WS03C', 'Qtof_P_E3_WS03A', 'Qtof_P_E3_WS03B', 'Qtof_P_E3_WS03C')
PCi <- data.frame(PCA$x,
filename = filenames)
ggplot(PCi, aes(x = PC1, y = PC2, color = filename)) +
geom_point(size = 3) +
xlab('PC1 52.51%') +
ylab('PC2 32.32%')
CanopusAll <- read_csv('~/Desktop/CechData/CanopusDIAOutput.csv') %>%
rename_with(~gsub(' ', '_', .x)) %>%
dplyr::select(-all_classifications)
datatable(CanopusAll,
filter = 'top',
extensions = 'Buttons',
options = list(pageLength = 12,
dom = 'Bfrtip',
scrollX = TRUE))
To simplify the visual inspection of classes, annotations with under 3 metabolites were binned to the “other” category
DIAF <- CanopusAll %>%
group_by(level_5) %>%
summarise(n = n()) %>%
ungroup() %>%
drop_na() %>%
mutate(bincat = ifelse(n < 3, 'other', level_5))
Dall <- ggplot(DIAF, aes(x = bincat, y = n)) +
geom_col() +
theme(axis.text.x = element_text(angle = 90))
plotly::ggplotly(Dall, width = 900, height = 600)
DIAFfilt <- DIAF %>%
filter(!bincat %in% c('Amino acids and derivatives', 'Peptides'))
DF <- ggplot(DIAFfilt, aes(x = bincat, y = n)) +
geom_col() +
theme(axis.text.x = element_text(angle = 90))
plotly::ggplotly(DF, width = 900, height = 600)
The blanks looked noisy so we built a new Progenesis run with both blanks and the extracts. Features which were more abundant in the blanks were filtered out. This significantly decreased the size of our dataset from ~8000 features to ~200 features.
ProgFileFilt <- read_csv('~/Desktop/CechData/rerun/220420_FINAL_QTOF_POS_DIA.csv', skip = 2) %>%
dplyr::select(Compound, 16:33) %>%
column_to_rownames('Compound')
ProgTF <- as.data.frame(t(ProgFileFilt))
PCAF <- prcomp(ProgTF, scale. = T, center = T)
filenamesFilt <- c('Qtof_P_E3_EXBLANKA', 'Qtof_P_E3_EXBLANKB', 'Qtof_P_E3_EXBLANKC', 'Qtof_P_E3_Wasteblank4', 'Qtof_P_E3_Wasteblank5', 'Qtof_P_E3_Wasteblank6', 'Qtof_P_E3_WS03A', 'Qtof_P_E3_WS03B', 'Qtof_P_E3_WS03C')
PCiF <- data.frame(PCAF$x,
filename = filenamesFilt)
ggplot(PCiF, aes(x = PC1, y = PC2, color = filename)) +
geom_point(size = 3) +
xlab('PC1 84.27%') +
ylab('PC2 8.79%')
While out data is already skewed to show us what we want since it has been filtered down to only features which are most abundant in the Ashwagandha extracts, we see clear separation of the extracts from the blanks.
CanopusFilt <- read_csv('~/Desktop/CechData/rerun/FilteredDIA.csv') %>%
rename_with(~gsub(' ', '_', .x)) %>%
dplyr::select(-all_classifications)
datatable(CanopusFilt,
filter = 'top',
options = list(pageLength = 12,
dom = 'Bfrtip',
scrollX = TRUE))
DIAFilterd <- CanopusFilt %>%
group_by(level_5) %>%
summarise(n = n()) %>%
ungroup() %>%
drop_na() %>%
mutate(bincat = ifelse(n < 3, 'other', level_5))
Dfl <- ggplot(DIAFilterd, aes(x = level_5, y = n)) +
geom_col() +
theme(axis.text.x = element_text(angle = 90))
plotly::ggplotly(Dfl, width = 900, height = 600)
By filtering our dataset it looks like we lost a lot of metabolites that were originally annotated as withanolides. It will be important to analyze our blanks next to see what we are annotating in them.
ProgBlanks <- read_csv('~/Desktop/CechData/rerun/blanks/BlanksRawPos.csv', skip = 2) %>%
dplyr::select(Compound, 16:27) %>%
column_to_rownames('Compound')
ProgB <- as.data.frame(t(ProgBlanks))
PCAB <- prcomp(ProgB, scale. = T, center = T)
filenamesB <- c('Qtof_P_E3_EXBLANKA', 'Qtof_P_E3_EXBLANKB', 'Qtof_P_E3_EXBLANKC', 'Qtof_P_E3_Wasteblank4', 'Qtof_P_E3_Wasteblank5', 'Qtof_P_E3_Wasteblank6')
PCiB <- data.frame(PCAB$x,
filename = filenamesB)
ggplot(PCiB, aes(x = PC1, y = PC2, color = filename)) +
geom_point(size = 3) +
xlab('PC1 37.1%') +
ylab('PC2 24.79%')